library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ purrr   0.2.4
## ✔ tibble  1.4.2     ✔ dplyr   0.7.4
## ✔ tidyr   0.8.0     ✔ stringr 1.2.0
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

3.2.4 Exercises

# 1
ggplot(mpg)

# We see nothing because there are no aesthetics

# 2
nrow(mpg)
## [1] 234
ncol(mpg)
## [1] 11
# 3
# The drive of the car

# 4
ggplot(mpg,aes(y=hwy,x=cyl)) + 
  geom_point()

# 5
ggplot(mpg,aes(y=class,x=drv)) + 
  geom_point()

# There are many points in the same positions, so not all data points are visible

3.3.1 Exercises

# 1
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, color = "blue"))

# Aesthetics take variables as inputs
# In the above code, R creates an equal length vector of strings values (all = "blue")
#   ggplot colors the points based on the factor levels of the color vector of strings
#   there is only one level ("blue") so all the points have the same color

# We need to manually assign color = "blue" outside of the aesthetic mapping
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy), color = "blue")

# Note also that the legend no longer appears to indicate the color definitions
#   there are no implied meanings to the colors now, the points are simply blue

# 2
# categoricals: manufacturer, model, trans, drv, fl, class
# continuous: displ, year, cyl, cty, hwy

# displ, year, cyl could all be considered categorical as well

mpg
## # A tibble: 234 x 11
##    manufacturer model    displ  year   cyl trans   drv     cty   hwy fl   
##    <chr>        <chr>    <dbl> <int> <int> <chr>   <chr> <int> <int> <chr>
##  1 audi         a4        1.80  1999     4 auto(l… f        18    29 p    
##  2 audi         a4        1.80  1999     4 manual… f        21    29 p    
##  3 audi         a4        2.00  2008     4 manual… f        20    31 p    
##  4 audi         a4        2.00  2008     4 auto(a… f        21    30 p    
##  5 audi         a4        2.80  1999     6 auto(l… f        16    26 p    
##  6 audi         a4        2.80  1999     6 manual… f        18    26 p    
##  7 audi         a4        3.10  2008     6 auto(a… f        18    27 p    
##  8 audi         a4 quat…  1.80  1999     4 manual… 4        18    26 p    
##  9 audi         a4 quat…  1.80  1999     4 auto(l… 4        16    25 p    
## 10 audi         a4 quat…  2.00  2008     4 manual… 4        20    28 p    
## # ... with 224 more rows, and 1 more variable: class <chr>
str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame':    234 obs. of  11 variables:
##  $ manufacturer: chr  "audi" "audi" "audi" "audi" ...
##  $ model       : chr  "a4" "a4" "a4" "a4" ...
##  $ displ       : num  1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int  1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int  4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr  "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr  "f" "f" "f" "f" ...
##  $ cty         : int  18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int  29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr  "p" "p" "p" "p" ...
##  $ class       : chr  "compact" "compact" "compact" "compact" ...
# 3

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, color = cty))

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, size = cty))

# ggplot(data = mpg) + 
#   geom_point(mapping = aes(x = displ, y = hwy, shape = cty))

# When mapping a continuous variable to color and size, gpplot handles them well
#   ggplot seems to treat these variables as ordered factors with many levels
#   color is not assigned randomly to factor levels, but has a light/dark scale

# Mapping a continuous variable to a shape throws an error
#   there is no good way to present continuous data as shapes


# 4
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = hwy, y = hwy, size = hwy, color = hwy))

# The variables just show up in multiple aesthetics

# 5
?geom_point

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, fill=cty), color = "blue", shape = 21, stroke = 2)

# Stroke sets the width of the line that draws the point geoms


# 6

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, color = displ < 5))

# R coerces the given info to a vector that fits the plot format
#   In this case we get a logical vector

3.5.1 Exercises

# 1
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_wrap(~cty)

# ggplot converts the continuous to a categorical


# 2

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(drv~cyl)

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = drv, y = cyl))

# They mean there are no data points with those combination of categories
#   e.g. there are no Rear wheel drive (R) cars with 4 cylinders


# 3

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(drv ~ .)

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(. ~ cyl)

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_wrap(~drv, nrow = 3)

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_wrap(~ cyl, nrow = 1)

# The . means to facet across all groups on the grid
#   The results are effectively the same as facet_wrap, but with cosmetic differences


# 4

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) + 
  facet_wrap(~ class, nrow = 2)

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, color = class))

# Color Plot:
#   Easier to compare points from different classes
#   Easier to understand which points are outliers, which classes are unusual

# Facet plot:
#   Easier to see trends within classes

# As the data points increase, the color plot will become difficult to interpret


# 5 
# nrow and ncol determine the number of rows/cols in a facet_wrap plot
# These are implied by the dimensions of the variables given in facet_grid


# 6
# Screens are typically wider than they are tall, so the plot will probably look better

3.6.1 Exercises

# 1
?geom_line
?geom_boxplot
?geom_histogram
?geom_area


# 2
# Scatter plot of hwy ~ displ colored by the drv
#   also a line for each drv showing the mean trend, without error bars
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) + 
  geom_point() + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess'

# 3
# show.legend = FALSE suppresses the legend, if you remove it returns to it's default value TRUE
#   Hadley suppressed the legend earlier because the graphs were already very small

# 4
# It's a TRUE/FALSE that determines whether the error bars show up

# 5
# They are equivalent
# The data and mapping declared in the initial ggplot statement passes down to the geoms

# 6
ggplot(data = mpg, aes(y = hwy, x = displ)) + 
  geom_point() + 
  geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess'

# You need to have read the chapter to know drv is the group variable below
ggplot(data = mpg, aes(y = hwy, x = displ, group = drv)) + 
  geom_point() + 
  geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess'

# Note that you could also apply the group aesthetic to geom_smooth because it makes no difference for geom_point()

ggplot(data = mpg, aes(y = hwy, x = displ, color = drv)) + 
  geom_point() + 
  geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess'

ggplot(data = mpg, aes(y = hwy, x = displ)) + 
  geom_point(aes(color = drv)) + 
  geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess'

ggplot(data = mpg, aes(y = hwy, x = displ)) + 
  geom_point(aes(color = drv)) + 
  geom_smooth(aes(linetype = drv), se=FALSE)
## `geom_smooth()` using method = 'loess'

ggplot(data = mpg, aes(y = hwy, x = displ)) + 
  geom_point(aes(fill = drv), shape = 21, color = "white", stroke = 2)

3.7.1 Exercises

# 1
?stat_summary
# geom = "pointrange"
?geom_pointrange

ggplot(data = diamonds) + 
  geom_pointrange(
    mapping = aes(x = cut, y = depth),
    stat = "summary",
    fun.ymin = min,
    fun.ymax = max,
    fun.y = median
  )

# 2
?geom_col

# I bleieve that geom_col() is equivalent to geom_bar(stat = "identity")
# There might be more to it than that

# 3
# Not really sure of the answer here. The geom-stat pairs always have the same default position?

# 4
?stat_smooth
# This is basically just asking to look at the help

# 5 
ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, y = ..prop..))

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = color, y = ..prop..))

# In the preceding charts, the proportions are computed across the individual categories

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = color))

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, group = 1, fill = color, y = ..prop..))

# Note that the last chart doesn't work, it is the same as the preceding chart
#   When we didn't

3.8.1 Exercises

# 1
ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) + 
  geom_point()

# The points are overlapping, improve by jittering
ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) + 
  geom_jitter()

# 2
?geom_jitter
# height and width

# 3
?geom_count

ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) + 
  geom_count()

# geom_count() shows overlapping points as larger points
#   this seems generally better than geom_jitter to me
#   I'm not sure when geom_jitter is better


# 4
?geom_boxplot

# the default position is dodge
ggplot(mpg, aes(x=drv, y=cty)) +
  geom_boxplot()

ggplot(mpg, aes(x=drv, y=cty, group = class)) +
  geom_boxplot()
## Warning: position_dodge requires non-overlapping x intervals

# Here we have the boxplots of city mpg grouped by the class variable
#   It is trying to make a box for each for each combination of drv+class
#   It throws a warning because the boxes overlap horizontally, which is ugly

3.9.1 Exercises

# 1
# stacked bar
ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = color))

# I guess this what Mr. Wickham means
ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = color)) +
  coord_polar()

# 2 
?labs

# allows for manual labelling of axes


 # 3
?coord_map
?coord_quickmap
# coor_map adjusts plotted latitude/longitude data to account for the spherical shape of the earth
#   the space between latitudes is much larger at the equator than at the poles
#   coord_quickmap uses an approximation that is much faster but is less accurate, especially near the poles

# 4
ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
  geom_point() + 
  geom_abline() +
  coord_fixed()

# geom_fixed forces a specific ratio of y~x in the graph
#   the default ratio=1 doesnt show us much in this graph
#   to understand better, set the ratio to 5 or to 0.2
#   By default, ggplot choose a coordinate ratio based upon the ranges of x and y

# abline just adds a straight line to the graph
#   This is useful for annotations and calling out trends
#   No parameters are passed so the abline defaults to y = x

ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
  geom_point() + 
  geom_abline() +
  coord_fixed(ratio = 5)

ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
  geom_point() + 
  geom_abline() +
  coord_fixed(ratio = 0.2)